#############
#############
#############
## Stefan Müller and Sven-Oliver Proksch: 
## Nostalgia in European Party Politics:
## A Text-Based Measurement Approach
## British Journal of Political Science
##
## Script returns all tables and plots 
## reported in SI Section E
## Table A10, Table A11, Table A12, Table A13, Table A14
## Figure A7, Figure A8, Figure A9, Figure A10, Figure A11
#############
#############
#############

library(dplyr)     # CRAN v1.1.2 
library(texreg)    # CRAN v1.38.6 
library(ggplot2)   # CRAN v3.4.2 
library(ggeffects) # CRAN v1.3.0
library(lme4)      # CRAN v1.1-34 
library(cowplot)   # CRAN v1.1.1 
library(Hmisc)     # CRAN v5.1-0 
library(estimatr)  # CRAN v1.0.0 
library(stringr)   # CRAN v1.5.0
library(tidyr)     # CRAN v1.3.0
library(ggcorrplot) # CRAN v0.1.4 

# If the code does not run, one or more packages may have been 
# updated, which may result in errors or conflicts. You can solve this issue
# by installing the package version listed above or by using the 
# groundhog package:
# after installing groundhog using install.packages("groundhog")
# change library(name_of_package) to
# groundhog::groundhog.library(name_of_package, date = "2023-09-04")
# Instead of adjusting the library() function for each package, 
# you can adjust them at all once using the
# the following syntax:
# groundhog.library("
#                   library('pkgA')
#                   library('pkgB')
#                   library('pkgC')", date = "2023-09-04")
# More details are available at: https://groundhogr.com/using/

# load custom ggplot2 scheme
source("function_theme_base.R")

# load manifesto-level dataset and add additional relevant variables
dat_manifestolevel_raw <- readRDS("data_nostalgia_manifestolevel.rds")

# load and merge ParlGov data
dat_parlgov <- read.csv("parties_parlgov_2023.csv")

dat_parlgov_unique <- dat_parlgov |> 
    filter(party_id != "501") |> # exclude parties that appear twice - use coding that matches Manifesto Project
    filter(party_id != "373") |> 
    filter(party_id != "809") |> 
    filter(party_id != "1313") |> 
    mutate(party = as.character(cmp)) |> 
    select(party, family_name) |> 
    unique() |> 
    filter(!is.na(party)) |> 
    group_by(party)

nrow(dat_parlgov_unique)

length(unique((dat_parlgov_unique$party)))

# recode Christian Democracy to Christian Democratic
# and Social Democracy to Social Democratic

dat_parlgov_unique <- dat_parlgov_unique |> 
    mutate(family_name = str_replace_all(family_name, "democracy", "democratic"))

table(dat_parlgov_unique$family_name)

dat_manifestolevel <- dat_manifestolevel_raw |> 
    left_join(dat_parlgov_unique) 

# nrow should not be different after merging dataset
stopifnot(nrow(dat_manifestolevel) == nrow(dat_manifestolevel_raw))


# 3 manifestos have "to be coded" as party family
# these are all from the Slovenian National Party
dat_manifestolevel |> 
    filter(family_name == "to be coded") |> 
    select(manifesto_id, partyname, countryname)

# https://en.wikipedia.org/wiki/Slovenian_National_Party

# The Slovenian National Party (Slovene: Slovenska Nacionalna Stranka, SNS) 
# is a nationalist[13] political party in Slovenia led by Zmago Jelinčič Plemeniti.
# The party is known for its Euroscepticism and opposes Slovenia's membership in NATO.
                   # [14][15] It also engages in what many consider to be historical negationism of events 
# in Slovenia during World War II.[16]


# recode this party as "Right-wing"

dat_manifestolevel <- dat_manifestolevel |> 
    mutate(family_name = dplyr::recode(family_name, "to be coded" = "Right-wing"))


table(dat_manifestolevel$family_name)

# merge CHES party family data

dat_ches_raw <- read.csv("1999-2019_CHES_dataset_means(v2).csv")

# get party ID and relevant variables

dat_ches <- dat_ches_raw |> 
    select(party = cmp_id,
           family) |> 
    unique() |> 
    group_by(party) |> 
    mutate(n_included = n()) |> 
    sample_n(size = 1) |> 
    select(-n_included) |> 
    mutate(party = as.character(party))

# merge data
dat_manifestolevel <- dat_manifestolevel |> 
    left_join(dat_ches)


min(dat_manifestolevel$year)
max(dat_manifestolevel$year)


# get long data frame
dat_nost_long <- dat_manifestolevel |> 
    select(contains("per_1000"), region, countryname) |> 
    gather(nostalgia_measure, value, -c(countryname, region)) |> 
    mutate(nostalgia_measure = str_remove_all(nostalgia_measure, "nostalgia_sentences_per_1000_")) |> 
    mutate(nostalgia_measure_clean = dplyr::recode(nostalgia_measure,
                                                   "sentiment" = "Dictionary + Sentiment",
                                                   "emb" = "Dictionary + Embeddings",
                                                   "sentiment_emb" = "Dictionary + Embeddings + Sentiment",
                                                   "svm" = "SVM",
                                                   "bert" = "DistilBERT",
                                                   "nostalgia_sentences_per_1000" = "Dictionary"))


# get average nostalgia across measures and countries 
# and 95% bootstrapped confidence intervals
dat_nost_long_cis <- dat_nost_long |> 
    group_by(region, countryname, nostalgia_measure_clean) |> 
    do(data.frame(rbind(Hmisc::smean.cl.boot(.$value, conf.int = 0.95))))

# cross-table (should be 24 observations per classifier -- one for each country)
table(dat_nost_long_cis$nostalgia_measure_clean)

dat_nost_avgs <- dat_nost_long_cis |> 
    group_by(nostalgia_measure_clean) |> 
    summarise(mean_nostalgia = mean(Mean))

dat_nost_long_cis <- dat_nost_long_cis |> 
    mutate(nostalgia_measure_clean = factor(nostalgia_measure_clean,
                                            levels = c("Dictionary",
                                                       "Dictionary + Embeddings",
                                                       "Dictionary + Sentiment",
                                                       "Dictionary + Embeddings + Sentiment",
                                                       "SVM",
                                                       "DistilBERT"))) |>
    mutate(region = factor(region,
                           levels = c("Central and Eastern Europe",
                                      "Southern Europe",
                                      "Western Europe",
                                      "Northern Europe")))

# Figure A07 ----
ggplot(dat_nost_long_cis, aes(x = reorder(countryname, Mean),
                              y = Mean,
                              colour = region,
                              shape = region,
                              ymin = Lower, 
                              ymax = Upper)) +
    geom_point(size = 3) +
    coord_flip() +
    geom_linerange() +
    scale_y_continuous(limits = c(0, 100)) +
    scale_shape_manual(values = c(15, 17, 1, 16))  +
    scale_color_manual(values = c("darkred", "darkblue", "black", "darkgreen"))  +
    facet_wrap(~nostalgia_measure_clean,
               labeller = label_wrap_gen(20), nrow = 2,
               scales = "free_x") +
    labs(x = NULL, 
         y = "Average nostalgic sentences (per 1,000 sentences)") +
    theme(legend.title = element_blank())
ggsave("fig_a07.pdf",
       width = 9.3, height = 11)


# store dat_manifestolevel as dat_reg for regressions
dat_reg <- dat_manifestolevel

# get range of loglibcons variable for observations
# included in our regression models
dat_reg |> 
    filter(!is.na(unemp_lag1)) |> 
    summarise(min_libcons = min(loglibcons),
              max_libcons = max(loglibcons))


# change baseline category for some of the variables
dat_reg$party_family_recoded <- relevel(factor(dat_reg$party_family_recoded),
                                        ref = "Nationalist")


dat_reg$cabinet_status_lag2 <- relevel(factor(dat_reg$cabinet_status_lag2),
                                       ref = "Opposition")


dat_reg$decade <- factor(dat_reg$decade)

dat_reg$populism_popu_list_categories <- relevel(factor(dat_reg$populism_popu_list_categories),
                                                 ref = "Other")



# rescale variables for regression models
rescale_var <- function(x) {
    (x - mean(x)) / sd(x)
}

# test function
rescale_var(x = c(1, 3, 5, 6))


# z transform variables
dat_reg <- dat_reg |> 
    filter(!is.na(nostalgia_sentences_per_1000_bert)) |> 
    ungroup() |> 
    mutate(nostalgia_base_rescale = rescale_var(nostalgia_sentences_per_1000),
           nostalgia_base_sent_rescale = rescale_var(nostalgia_sentences_per_1000_sentiment),
           nostalgia_emb_rescale = rescale_var(nostalgia_sentences_per_1000_emb),
           nostalgia_emb_sent_rescale = rescale_var(nostalgia_sentences_per_1000_sentiment_emb),
           nostalgia_bert_rescale = rescale_var(nostalgia_sentences_per_1000_bert),
           nostalgia_svm_rescale = rescale_var(nostalgia_sentences_per_1000_svm))



table(dat_reg$party_family_recoded)

# recode party family codings CHES and ParlGov

dat_reg <- dat_reg |> 
    mutate(family_ches = dplyr::recode(family,
                                       "agrarian/centre" = "Other",
                                       "christdem" = "Christian Democratic",
                                       "confessional" = "Other",
                                       "cons" = "Conservative",
                                       "green" = "Green Party",
                                       "liberal" = "Liberal",
                                       "no family" = "Other",
                                       "rad left" = "Radical Left",
                                       "rad right" = "Radical Right",
                                       "regionalist" = "Other",
                                       "socialist" = "Socialist"
    ))


dat_reg <- dat_reg |> 
    mutate(family_parlgov = 
               dplyr::recode(family_name,
                             "Special issue" = "Other",
                             "Agrarian" = "Other")) |> 
    mutate(family_parlgov = str_to_title(family_parlgov))






# function to plot predicted values for a discrete/factor variable
plot_pred_discrete <- function(data) {
    
    ggplot(data = data,
           aes(x = reorder(x, predicted), y = predicted,
               ymin = predicted - 1.96 * std.error,
               ymax = predicted + 1.96 * std.error)) +
        geom_point(size = 3) +
        geom_linerange() +
        labs(x = NULL,
             y = "Predicted Levels of Nostalgia (DistilBERT)") +
        theme(panel.grid.major.x = element_line(colour = "grey70",
                                                linetype = "dotted"))
    
}


# compare fixed effects models with random intercepts model

lm_compare_lmer <- lmer(
    nostalgia_sentences_per_1000_bert ~
        loglibcons + 
        cabinet_status_lag2 +
        vote_share_cmp +
        unemp_lag1 + 
        (1 | countryname)  +
        (1 | party),
    data = dat_reg
)

lm_compare_lmer_party <- lmer(
    nostalgia_sentences_per_1000_bert ~
        party_family_recoded + 
        cabinet_status_lag2 +
        vote_share_cmp +
        unemp_lag1 + 
        (1 | countryname)  +
        (1 | party),
    data = dat_reg
)


# run as fixed-effects model

lm_compare_fe <-  estimatr::lm_robust(
    nostalgia_sentences_per_1000_bert ~
        loglibcons + 
        cabinet_status_lag2 +
        vote_share_cmp +
        unemp_lag1,
    fixed_effects = countryname, 
    clusters = party,
    se_type = "stata",
    data = dat_reg)


lm_compare_fe_party <-  estimatr::lm_robust(
    nostalgia_sentences_per_1000_bert ~
        party_family_recoded + 
        cabinet_status_lag2 +
        vote_share_cmp +
        unemp_lag1,
    fixed_effects = countryname, 
    clusters = party,
    se_type = "stata",
    data = dat_reg)


screenreg(list(lm_compare_lmer,
               lm_compare_fe,
               lm_compare_lmer_party,
               lm_compare_fe_party),
          include.ci = FALSE)



# Table A10 ----
texreg(list(lm_compare_lmer,
            lm_compare_fe,
            lm_compare_lmer_party,
            lm_compare_fe_party),
       include.ci = FALSE, 
       caption.above = TRUE,
       omit.coef = "(Intercept)",
       custom.coef.map = list(
           "loglibcons" = "Cultural Conservatism",
           "cabinet_status_lag2Government" = "Government",
           "vote_share_cmp" = "Vote Share",
           "unemp_lag1" = "Unemployment (t-1)",
           "party_family_recodedChristian Democratic" = "Christ. Dem. (ref.: Nat.)",
           "party_family_recodedConservative" = "Conservative",
           "party_family_recodedEcological" = "Ecological",
           "party_family_recodedLiberal" = "Liberal",
           "party_family_recodedOther" = "Other",
           "party_family_recodedSocial Democratic" = "Social Dem.",
           "party_family_recodedSocialist" = "Socialist"
       ),
       include.variance = FALSE,
       stars = c(0.001, 0.01, 0.05, 0.1),
       custom.gof.names = c("AIC", "BIC",
                            "Log Likelihood",
                            "N", 
                            "N Groups: Parties", 
                            "N Groups: Countries",
                            "R$^2$", 
                            "Adj. R$^2$",
                            "RMSE",
                            "N Clusters (Parties)"),
       symbol = '\\dagger',
       custom.model.names = c("M1 (mixed-effects)",
                              "M2 (fixed-effects)",
                              "M3 (mixed-effects)",
                              "M4 (fixed-effects)"),
       caption = "Predicting nostalgia (DistilBERT) using mixed-effects models with random intercepts for countries, parties, and elections (M1 and M3), and a fixed-effects models with country-fixed effects and standard errors clustered by party (M2 and M4). Intercept omitted from table.",
       fontsize = "footnotesize",
       label = "tab:fixed_effects",
       file = "tab_a10.tex")



# use party family rather than loglibcons
lm_rescale_base_parfam <- lmer(
    nostalgia_base_rescale ~
        party_family_recoded + 
        cabinet_status_lag2 +
        vote_share_cmp +
        unemp_lag1 + 
        (1 | election_id) + (1 | countryname)  +
        (1 | party),
    data = dat_reg
)


# repeat for all measures
lm_rescale_emb_parfam <- update(lm_rescale_base_parfam, 
                                nostalgia_emb_rescale ~.
)


lm_rescale_emb_sent_parfam <- update(lm_rescale_base_parfam, 
                                     nostalgia_emb_sent_rescale ~.
)


lm_rescale_sent_parfam <- update(lm_rescale_base_parfam, 
                                 nostalgia_base_sent_rescale ~.
)

lm_rescale_svm_parfam <- update(lm_rescale_base_parfam, 
                                nostalgia_svm_rescale ~.
)

lm_rescale_bert_parfam <- update(lm_rescale_base_parfam, 
                                 nostalgia_bert_rescale ~.
)

screenreg(list(lm_rescale_base_parfam, lm_rescale_emb_parfam,
               lm_rescale_sent_parfam, lm_rescale_emb_sent_parfam,
               lm_rescale_svm_parfam,
               lm_rescale_bert_parfam))


gofnames <- c("AIC", "BIC",
              "Log Likelihood",
              "N",
              "N Groups: Parties", 
              "N Groups: Elections",
              "N Groups: Countries")


# Table A11 ----
texreg(list(lm_rescale_base_parfam, lm_rescale_emb_parfam,
            lm_rescale_sent_parfam, lm_rescale_emb_sent_parfam,
            lm_rescale_svm_parfam,
            lm_rescale_bert_parfam),
       caption.above = TRUE,
       custom.coef.map = list(
           "(Intercept)" = "(Intercept)",
           "party_family_recodedChristian Democratic" = "Christ. Dem. (ref.: Nat.)",
           "party_family_recodedConservative" = "Conservative",
           "party_family_recodedEcological" = "Ecological",
           "party_family_recodedLiberal" = "Liberal",
           "party_family_recodedOther" = "Other",
           "party_family_recodedSocial Democratic" = "Social Dem.",
           "party_family_recodedSocialist" = "Socialist",
           "cabinet_status_lag2Government" = "Government",
           "vote_share_cmp" = "Vote Share",
           "unemp_lag1" = "Unemployment (t-1)"
       ),
       include.variance = FALSE,
       stars = c(0.001, 0.01, 0.05, 0.1),
       custom.header = list("Dictionary-Based Methods (M1--M4)" = 1:4, "Machine Learning (M5--M6)" = 5:6),
       symbol = '\\dagger',
       custom.model.names = c("M1",
                              "M2",
                              "M3",
                              "M4",
                              "M5",
                              "\\textbf{M6}"),
       custom.gof.names = gofnames,
       caption = "Predicting nostalgia for various measurements with standardized dependent variables (mean of 0 and standard deviation of 1). Linear mixed-effects models with random intercepts for countries, parties, and elections. Standard errors in parentheses. 
       M1: Base dictionary; M2: Base dictionary + embeddings dictionary; M3: Base dictionary + positive sentiment; M4: Base dictionary + embeddings dictionary + positive sentiment; M5: Bag-of-words classifier (SVM); M6: Transformer-based classifier (DistilBERT).",
       fontsize = "footnotesize",
       label = "tab:main_rescaled_parfam",
       file = "tab_a11.tex")



# party family by region

dat_reg <- dat_reg |> 
    mutate(decade2 = paste0(decade, "s"))


regions <- unique(dat_reg$region)

# store predicted values for models based on each region
dat_pred_region_decades <- data.frame()

for (i in regions) {
    lm_base_decade_region <- lmer(
        nostalgia_sentences_per_1000_bert ~
            +  party_family_recoded +
            cabinet_status_lag2 +
            vote_share_cmp +
            unemp_lag1 +
            decade2 +
            (1 | party),
        data = filter(dat_reg, region == i),
    )
    
    # get predicted values
    pred_partyfam_region_decade <- ggpredict(
        lm_base_decade_region, terms = c("decade2")
        
    )
    print(pred_partyfam_region_decade)
    
    
    pred_partyfam_region_decade$region <- i
    
    dat_pred_region_decades <- bind_rows(dat_pred_region_decades, pred_partyfam_region_decade)
}


# relevel factor
dat_pred_region_decades$region <- factor(dat_pred_region_decades$region,
                                         levels = c("Central and Eastern Europe",
                                                    "Southern Europe", "Western Europe", "Northern Europe"))

# Figure A8 ----
ggplot(data = dat_pred_region_decades, 
       aes(x = x, 
           y = predicted)) +
    geom_point(size = 3) +
    geom_linerange(aes(ymin = predicted - 1.96 * std.error,
                       ymax = predicted + 1.96 * std.error),
                   linewidth = 0.8) +
    facet_wrap(~region, scales = "free_y", nrow = 1,
               labeller = label_wrap_gen(width = 18)) +
    scale_y_continuous(limits = c(0, 110),
                       breaks = c(seq(0, 90, 30))) +
    labs(x = NULL, y = "Predicted Levels of Nostalgia (DistilBERT)") +
    theme(panel.grid.major.y = element_line(colour = "grey70",
                                            linetype = "dotted"),
          axis.text.x = element_text(angle = 90, vjust = 0.5))
ggsave("fig_a08.pdf", 
       width = 9, height = 4)



# regression models for each region

lm_north <- lmer(
    nostalgia_sentences_per_1000_bert ~
        +  party_family_recoded +
        cabinet_status_lag2 +
        vote_share_cmp +
        unemp_lag1 + 
        decade2 +
        (1 | party),
    data = filter(dat_reg, 
                  region == "Northern Europe"))


lm_cce <- update(
    lm_north,
    data = filter(dat_reg, 
                  region == "Central and Eastern Europe"))


lm_south <- update(
    lm_north,
    data = filter(dat_reg, 
                  region == "Southern Europe"))


lm_west <- update(
    lm_north,
    data = filter(dat_reg, 
                  region == "Western Europe"))

length(unique(dat_reg$party_family_recoded))

screenreg(list(lm_west, lm_cce, lm_south,
               lm_north),
          custom.model.names = c("M1: Western E.", "M2: Eastern E.",
                                 "M3: Southern E.", "M4: Northern E."))

gofnames_regions <- c("AIC", "BIC",
                      "Log Likelihood",
                      "N",
                      "N Groups: Parties")


# Table A12 ----
texreg(list(lm_cce, 
            lm_south,
            lm_west, 
            lm_north),
       custom.model.names = c("M1: East", "M2: South",
                              "M3: West", "M4: North"),
       caption.above = TRUE,
       include.variance = FALSE,
       stars = c(0.001, 0.01, 0.05, 0.1),
       symbol = '\\dagger',
       custom.coef.map = list(
           "(Intercept)" = "(Intercept)",
           "party_family_recodedChristian Democratic" = "Party Family: Christian Dem. (ref.: Nationalist)",
           "party_family_recodedConservative" = "Party Family: Conservative",
           "party_family_recodedEcological" = "Party Family: Ecological",
           "party_family_recodedLiberal" = "Party Family: Liberal",
           "party_family_recodedOther" = "Party Family: Other",
           "party_family_recodedSocial Democratic" = "Party Family: Social Dem.",
           "party_family_recodedSocialist" = "Party Family: Socialist",
           "vote_share_cmp" = "Vote Share",
           "unemp_lag1" = "Unemployment (t-1)",
           "decade21970s" = "Decade: 1970s",
           "decade21980s" = "Decade: 1980s",
           "decade21990s" = "Decade: 1990s",
           "decade22000s" = "Decade: 2000s",
           "decade22010s" = "Decade: 2010s"
       ),
       custom.gof.names = gofnames_regions,
       caption = "Predicting nostalgic sentences (per 1,000 sentences; DistilBERT) for each region. Linear mixed-effects models with random
intercepts for countries, parties, and election. Standard errors in parentheses.",
       fontsize = "footnotesize",
       label = "tab:base_regions",
       file = "tab_a12.tex")



# run models with and without controls


lm_rescale_bert <- lmer(
    nostalgia_bert_rescale ~
        +  loglibcons + 
        cabinet_status_lag2 +
        vote_share_cmp +
        unemp_lag1 + 
        (1 | election_id) + (1 | countryname)  +
        (1 | party),
    data = dat_reg
)


lm_rescale_bert_parfam <- lmer(
    nostalgia_bert_rescale ~
        party_family_recoded + 
        cabinet_status_lag2 +
        vote_share_cmp +
        unemp_lag1 + 
        (1 | election_id) + (1 | countryname)  +
        (1 | party),
    data = dat_reg
)

lm_rescale_bert_partyfam_nocontrols <- lmer(
    nostalgia_bert_rescale ~
        party_family_recoded + 
        (1 | election_id) + (1 | countryname)  +
        (1 | party),
    data = dat_reg
)


lm_rescale_bert_loglibcons_nocontrols <- lmer(
    nostalgia_bert_rescale ~
        loglibcons + 
        (1 | election_id) + (1 | countryname)  +
        (1 | party),
    data = dat_reg
)

# Table A13 ----
texreg(list(lm_rescale_bert_loglibcons_nocontrols,
            lm_rescale_bert,
            lm_rescale_bert_partyfam_nocontrols, 
            lm_rescale_bert_parfam),
       caption.above = TRUE,
       custom.coef.map = list(
           "(Intercept)" = "(Intercept)",
           "loglibcons" = "Cultural Conservatism",
           "party_family_recodedChristian Democratic" = "Party Family: Christ. Dem. (ref.: Nat.)",
           "party_family_recodedConservative" = "Party Family: Conservative",
           "party_family_recodedEcological" = "Party Family: Ecological",
           "party_family_recodedLiberal" = "Party Family: Liberal",
           "party_family_recodedOther" = "Party Family: Other",
           "party_family_recodedSocial Democratic" = "Party Family: Social Dem.",
           "party_family_recodedSocialist" = "Party Family: Socialist",
           "cabinet_status_lag2Government" = "Party Family: Government",
           "vote_share_cmp" = "Vote Share",
           "unemp_lag1" = "Unemployment (t-1)"
       ),
       include.variance = FALSE,
       stars = c(0.001, 0.01, 0.05, 0.1),
       symbol = '\\dagger',
       custom.model.names = c("M1", "M2", "M3", "M4"),
       custom.gof.names = gofnames,
       caption = "Predicting nostalgia based on DistilBERT measures with standardized dependent variables (mean of 0 and standard deviation of 1), with and without control variables. Linear mixed-effects models with random intercepts for countries, parties, and elections. Standard errors in parentheses.",
       fontsize = "footnotesize",
       label = "tab:main_rescaled_controls",
       file = "tab_a13.tex")



dat_reg |> 
    filter(!is.na(family_parlgov)) |> 
    nrow()

# recode party families
dat_reg <- dat_reg |> 
    mutate(family_ches_recoded = dplyr::recode(
        family_ches, "Green Party" = "Other",
        "Liberal" = "Other",
        "Radical Left" = "Other",
        "Socialist" = "Other"
    )) |> 
    mutate(family_ches_recoded =  factor(
        family_ches_recoded, levels = c("Other", "Conservative", 
                                        "Christian Democratic",
                                        "Radical Right"))) |>
    mutate(family_cmp_recoded = dplyr::recode(
        party_family_recoded,
        "Ecological" = "Other", "Liberal" = "Other", 
        "Social Democratic" = "Other", "Socialist" = "Other")) |> 
    mutate(family_cmp_recoded =  factor(family_cmp_recoded,
                                        levels = c("Other", "Conservative", "Christian Democratic", "Nationalist"))) |> 
    mutate(family_parlgov_recoded = dplyr::recode(
        family_parlgov,
        "Communist/Socialist" = "Other", "Green/Ecologist" = "Other", 
        "Social Democratic" = "Other", "Liberal" = "Other")) |> 
    mutate(family_parlgov_recoded =  factor(
        family_parlgov_recoded, levels = c("Other", "Conservative", "Christian Democratic", "Right-Wing")))


table(dat_reg$family_parlgov_recoded)

# get number of available manifestos
dat_reg |> 
    filter(!is.na(family_parlgov_recoded)) |> 
    nrow()

# run model based on CHES, ParlGov, and CMP partay codings
lm_chesrecoded <- lmer(
    nostalgia_sentences_per_1000_bert ~
        family_ches_recoded + 
        (1 | election_id)  +  (1 | countryname)  +
        (1 | party),
    data = filter(dat_reg))



# get N parties for plot (CHES)
parties_ches <- dat_reg |> 
    filter(!is.na(family_ches_recoded)) |> 
    select(party) |> 
    unique() |> 
    nrow()

# get N parties for plot (ParlGov)
parties_parlgov <- dat_reg |> 
    filter(!is.na(family_parlgov_recoded)) |> 
    select(party) |> 
    unique() |> 
    nrow()

# get N parties for plot (CMP)
parties_cmp <- dat_reg |> 
    filter(!is.na(family_cmp_recoded)) |> 
    select(party) |> 
    unique() |> 
    nrow()


# predicted values for CHES
pred_chesfamily <- ggpredict(
    lm_chesrecoded, terms = c("family_ches_recoded")
)

p_chesfamily <- ggplot(data = pred_chesfamily,
                       aes(x = reorder(x, predicted), y = predicted,
                           ymin = predicted - 1.96 * std.error,
                           ymax = predicted + 1.96 * std.error)) +
    geom_point(size = 3) +
    geom_linerange() +
    coord_flip() +
    scale_y_continuous(breaks = c(seq(0, 60, 10)),
                       limits = c(0, 60)) +
    labs(title = paste0("Party Family (Chapel Hill Expert Survey): ", parties_ches, " Parties"),
         x = NULL,
         y = "Predicted Levels of Nostalgia (DistilBERT)")  +
    theme(panel.grid.major.x = element_line(colour = "grey70",
                                            linetype = "dotted"))
p_chesfamily

# run model with CMP party family coding
lm_cmprecoded <- lmer(
    nostalgia_sentences_per_1000_bert ~
        family_cmp_recoded + 
        (1 | election_id)  +  (1 | countryname)  +
        (1 | party),
    data = filter(dat_reg))


# predicted values CMP party family coding
pred_cmpfamily <- ggpredict(
    lm_cmprecoded, terms = c("family_cmp_recoded")
)

# plot
p_cmpfamily <- ggplot(data = pred_cmpfamily,
                      aes(x = reorder(x, predicted), y = predicted,
                          ymin = predicted - 1.96 * std.error,
                          ymax = predicted + 1.96 * std.error)) +
    geom_point(size = 3) +
    geom_linerange() +
    coord_flip() +
    scale_y_continuous(breaks = c(seq(0, 60, 10)),
                       limits = c(0, 60)) +
    labs(title = paste0("Party Family (Manifesto Project): ", parties_cmp, " Parties"),
         x = NULL,
         y = "Predicted Levels of Nostalgia (DistilBERT)")  +
    theme(panel.grid.major.x = element_line(colour = "grey70",
                                            linetype = "dotted"))
p_cmpfamily

# run model with ParlGov party family coding
lm_parlgovrecoded <- lmer(
    nostalgia_sentences_per_1000_bert ~
        family_parlgov_recoded + 
        (1 | election_id)  +  (1 | countryname)  +
        (1 | party),
    data = filter(dat_reg))


# predicted values with ParlGov coding
pred_parlgovfamily <- ggpredict(
    lm_parlgovrecoded, terms = c("family_parlgov_recoded")
)

# plot
p_parlgovfamily <- ggplot(data = pred_parlgovfamily,
                          aes(x = reorder(x, predicted), y = predicted,
                              ymin = predicted - 1.96 * std.error,
                              ymax = predicted + 1.96 * std.error)) +
    geom_point(size = 3) +
    geom_linerange() +
    coord_flip() +
    scale_y_continuous(breaks = c(seq(0, 60, 10)),
                       limits = c(0, 60)) +
    labs(title = paste0("Party Family (ParlGov): ", parties_parlgov, " Parties"),
         x = NULL,
         y = "Predicted Levels of Nostalgia (DistilBERT)")  +
    theme(panel.grid.major.x = element_line(colour = "grey70",
                                            linetype = "dotted"))
p_parlgovfamily


# Figure A9 ----
plot_grid(p_cmpfamily, p_parlgovfamily, p_chesfamily, nrow = 3)
ggsave("fig_a09.pdf", width = 9, height = 8)


# compare regression results
screenreg(list(lm_cmprecoded, 
               lm_parlgovrecoded,
               lm_chesrecoded))


# Table A14 ----
texreg(list(lm_cmprecoded, 
            lm_parlgovrecoded,
            lm_chesrecoded),
       custom.model.names = c("M1", "M2",
                              "M3"),
       caption.above = TRUE,
       include.variance = FALSE,
       stars = c(0.001, 0.01, 0.05, 0.1),
       symbol = '\\dagger',
       custom.coef.map = list(
           "(Intercept)" = "(Intercept)",
           "family_cmp_recodedConservative" = "Party Family (Manifesto Project): Conservative (ref.: Other)",
           "family_cmp_recodedChristian Democratic" = "Party Family (Manifesto Project): Christian Democratic",
           "family_cmp_recodedNationalist" = "Party Family (Manifesto Project): Nationalist",
           "family_parlgov_recodedConservative" = "Party Family (ParlGov): Conservative (ref.: Other)",
           "family_parlgov_recodedChristian Democratic" = "Party Family (ParlGov): Christian Democratic",
           "family_parlgov_recodedRight-Wing" = "Party Family (ParlGov): Right-Wing",
           "family_ches_recodedConservative" = "Party Family (CHES): Conservative (ref.: Other)",
           "family_ches_recodedChristian Democratic" = "Party Family (CHES): Christian Democratic",
           "family_ches_recodedRadical Right" = "Party Family (CHES): Radical Right"
       ),
       custom.gof.names = gofnames,
       caption = "Predicting nostalgic sentences (per 1,000 sentences; DistilBERT). 
       Linear mixed-effects models with random
intercepts for countries, parties, and election. Standard errors in parentheses.",
       fontsize = "footnotesize",
       label = "tab:base_partyfam",
       file = "tab_a14.tex")


# correlation between CHES left-right measures, RILE
# and state involvement in the economy scale from Lowe et al. (2011)

# get party ID and relevant variables

dat_ches <- dat_ches_raw |> 
    select(party = cmp_id,
           ches_year = year, ches_lr_econ = lrecon,
           ches_lr_gen = lrgen, 
           ches_galtan = galtan)

# assign dat_manifestolevel_raw to new object

dat_manifesto_cors <- dat_manifestolevel_raw

dat_manifesto_cors$party <- as.factor(dat_manifesto_cors$party)
dat_ches$party <- as.factor(dat_ches$party)

dat_manifestos <- dat_manifesto_cors |> 
    select(loglibcons, loglibcons_base,
           stateconomy,
           countryname, edate, year,
           party,
           rile, logrile)


dat_joined_cors <- left_join(dat_manifestos, 
                             dat_ches)


nrow(dat_joined_cors)

# get difference between year (election year) and ches_year (survey year)

dat_joined_cors <- dat_joined_cors |> 
    mutate(diff_years = abs(year - ches_year)) |> 
    arrange(countryname, party, diff_years, -year) |> 
    group_by(countryname, party, edate) |> 
    mutate(number = 1:n()) |> 
    filter(number == 1) |> # select closest survey
    filter(diff_years < 4) # select only differences of fewer than 4 yeras

nrow(dat_joined_cors)

min(dat_joined_cors$year)
max(dat_joined_cors$year)


dat_joined_cors <- dat_joined_cors |> 
    mutate(ches_galtan = as.numeric(ches_galtan)) |> 
    mutate(ches_lr_econ = as.numeric(ches_lr_econ)) |> 
    mutate(ches_lr_gen= as.numeric(ches_lr_gen)) |> 
    filter(!is.na(ches_year)) |> 
    ungroup() |> 
    mutate(ches_decade = paste0("Decade: ", substr(ches_year, 1,3), "0s")) |> 
    ungroup() 


dat_vars <- dat_joined_cors |> 
    select(ches_lr_gen, 
           ches_lr_econ,
           logrile,
           stateconomy
    )


colnames(dat_vars)

# adjust oclumn names
colnames(dat_vars) <-  c("CHES:\nGeneral Left-Right",
                         "CHES:\nEconomic Left-Right",
                         "Manifestos:\nLog RILE",
                         "Manifestos:\nState Involvement in Economy")


# adjust order
cors <- cor(dat_vars, use = "pairwise.complete.obs")

p_cors_lr <- ggcorrplot(cors,
                        # type = "upper",
                        lab = TRUE,
                        show.diag = TRUE,
                        lab_size = 4,
                        lab_col = "grey20",
                        colors = c("white", "white", "white"),
                        # colors = c("white", "white", "white"),
                        legend.title = "Correlation",
                        ggtheme = theme_baser
) +
    theme(legend.position = "none") +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5))


# Figure A10 ---- 
cowplot::plot_grid(p_cors_lr, NULL, nrow = 1,
                   rel_widths = c(0.8, 0.2))
ggsave("fig_a10.pdf", 
       width = 9, height = 5)

# compare involvement in economy and loglibcons


lm_base_bert_loglibcons_stateeconomy <- lmer(
    nostalgia_sentences_per_1000_bert ~
        loglibcons + 
        stateconomy +
        cabinet_status_lag2 +
        vote_share_cmp +
        unemp_lag1 + 
        (1 | election_id) + (1 | countryname)  +
        (1 | party),
    data = dat_reg
)


# function for predicted values (continuous variable)
plot_pred_continuous <- function(data) {
    
    ggplot(data = data,
           aes(x = x, y = predicted,
               ymin = predicted - 1.96 * std.error,
               ymax = predicted + 1.96 * std.error)) +
        geom_ribbon(fill = "grey80") +
        geom_line(group = 1) +
        scale_x_continuous(breaks = c(seq(-10, 10, 2))) +
        labs(x = NULL,
             y = "Predicted Levels of Nostalgia\n(DistilBERT)")
    
}


# state involvement in the economy: predicted values
pred_stateconomy <- ggpredict(
    lm_base_bert_loglibcons_stateeconomy, terms = c("stateconomy[all]")
)

p_stateconomy <- plot_pred_continuous(data = pred_stateconomy)  +
    scale_y_continuous(breaks = c(seq(0, 50, 10)),
                       limits = c(0, 50)) + 
    labs(x = "Economic Left-Right Position") +
    geom_rug(data = dat_reg,
             aes(x = stateconomy), 
             inherit.aes = FALSE,
             linewidth = 0.4,
             alpha = 0.3)


# loglibcons: predicted values
pred_loglibcons <- ggpredict(
    lm_base_bert_loglibcons_stateeconomy, terms = c("loglibcons[all]")
)


p_loglibcons <- plot_pred_continuous(data = pred_loglibcons)  +
    scale_y_continuous(breaks = c(seq(0, 50, 10)),
                       limits = c(0, 50)) + 
    labs(x = "Cultural Conservativsm") +
    geom_rug(data = dat_reg,
             aes(x = loglibcons), 
             inherit.aes = FALSE,
             linewidth = 0.4,
             alpha = 0.3)
p_loglibcons

# Figure A11 ----
plot_grid(p_loglibcons, p_stateconomy, 
          labels = c("(a)", "(b)"),
          label_size = 15,
          nrow = 1)
ggsave("fig_a11.pdf",
       width = 9, height = 4.5)

